"""
获取搜狐网首页的新闻标题和链接
"""
import re

import requests

pattern = re.compile(r'<a\s.*?href="(.+?)"\s.*?title=["\'](.+?)["\'].*?>')
resp = requests.get('https://www.sohu.com/')
if resp.status_code == 200:
    results = pattern.findall(resp.text)
    for href, title in results:
        if href.startswith('//'):
            href = 'https:' + href
        print(href, title)
